# -*- coding:utf-8 -*-
'''爬取中文社区部分内容'''
__author__="Bwj"

from bs4 import BeautifulSoup
from urllib import request

url='http://www.pythontab.com/html/pythonhexinbiancheng/index.html'
url_list=[url]
for i in range(2,20):
    url_list.append('http://www.pythontab.com/html/pythonhexinbiancheng/%s.html' %i)
#print(url_list)

def siper():
    source_list=[]
    for j in url_list:
        responses=request.urlopen(j)
        html=responses.read()
        #print(html)
        soup = BeautifulSoup(html, 'html.parser')
        titles = soup.select('#catlist > li > a') #获取标题  CSS选择器
        links=soup.select('#catlist > li > a')   #获取链接
        #print(titles)
        #print(links)

        for title,link in zip(titles,links):
            data={'title':title.get_text(),'link':link.get('href')} #字典，存放标题和内容
            source_list.append(data)
            #print(source_list)

        for l in source_list:
            responses2=request.urlopen(l['link'])
            html2=responses2.read()
            soup2=BeautifulSoup(html2,'html.parser')
            text_p=soup2.select('div.content > p')
            #print(text_p)

        text=[]
        for t in text_p:
            text.append(t.get_text().encode('utf-8'))
            #print(text)

        title_text=l['title']
        title_text =title_text.replace('*','').replace('/','or').replace('"','').replace('?','wenhao').replace(':','')
        print(title_text)

        with open(r"E:\python\sp_file\%s.txt" %title_text, 'wb') as f:
            for a in text:
                f.write(a)

if __name__=='__main__':
    siper()




